Load tidyverse

library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------- tidyverse 1.2.1 --
## √ ggplot2 3.2.0     √ purrr   0.3.2
## √ tibble  2.1.3     √ dplyr   0.8.3
## √ tidyr   0.8.3     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.4.0
## -- Conflicts ---------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Iris data

# load data
data(iris) 

# Transform data.frame to tibble
iris <- iris %>% as_tibble() 

# View data
glimpse(iris)
## Observations: 150
## Variables: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9,...
## $ Sepal.Width  <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1,...
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5,...
## $ Petal.Width  <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1,...
## $ Species      <fct> setosa, setosa, setosa, setosa, setosa, setosa, s...

Scatter plot

data + mapping + geom

Use + to combine elements.

iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width,
             colour = Species))+
  geom_point()

iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width,
             colour = Species, 
             size=Petal.Length))+
  geom_point()

data + mapping + geom + scale

Use + to combine elements.

iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width,
             colour = Species))+
  geom_point() +
  scale_color_manual(values =c('blue','red','yellow'))

data + mapping + geom + stats

iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width,
             colour = Species))+
  geom_point()+
  stat_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# An equivalent way to produce the same figure:
ggplot()+
  geom_point(data=iris, aes(x=Sepal.Length, y=Sepal.Width,
                       colour = Species),size=2)+
  stat_smooth(data=iris, aes(x=Sepal.Length, y=Sepal.Width,
                        colour = Species))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

data + mapping + geom + stats + coordinate

# Sub region
iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width,
             colour = Species))+
  geom_point()+
  stat_smooth()+
  coord_cartesian(xlim = c(5, 7),ylim=c(2,3))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Add description

iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width,
             colour = Species))+
  geom_point()+
  stat_smooth()+
  ggtitle("Width vs Length (Sepal)")+ 
  labs(x="sepal length",y="sepal width")+
  theme(text = element_text(size=14),
        legend.position="bottom",
        axis.text.x = element_text(angle = -10) )
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

data + mapping + geom + stats + facet

iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width))+
  geom_point()+
  stat_smooth()+
#  facet_grid(~Species)+
  facet_grid(Species~.)+
  ggtitle("Width vs Length (Sepal)")+
  labs(x="sepal length",y="sepal width")+
  theme_bw()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Restore and save figures

p1 <- iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width,
             colour = Species))+
  geom_point()+
  stat_smooth()+
  ggtitle("Width vs Length (Sepal)")+ 
  labs(x="sepal length",y="sepal width")+
  theme(text = element_text(size=14),
        legend.position="bottom",
        axis.text.x = element_text(angle = -10) )

p1
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#ggsave(p1,filename = "p1.png",width = 7,height=5)

Box plot

iris %>% 
  ggplot(aes(x=Species, y=Sepal.Width))+
  geom_boxplot() +
  coord_flip()

Histogram

iris %>% 
  ggplot(aes(x=Sepal.Width))+
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Histogram + density stats
iris %>% 
  ggplot(aes(x=Sepal.Width,y=stat(density)))+
  geom_histogram()+
  stat_density(geom='line',position='identity')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Density plot

iris %>% 
  ggplot(aes(x=Sepal.Width,
             fill=Species,color=Species))+
  geom_density(alpha=0.1)

Bar chart

iris %>% 
  ggplot(aes(x=Species))+
  geom_bar()

# Compute the percentage of samples with Sepal.Length > 5 for each species
# List of colors: http://sape.inf.usi.ch/quick-reference/ggplot2/colour
# Use the scales package for labeling with percentage
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
iris %>% 
  mutate(LargeSL=Sepal.Length>5) %>%
  group_by(Species) %>%
  summarize(prop=mean(LargeSL)) %>%
  ggplot(aes(x=Species,y=prop))+
  geom_bar(stat="identity",fill="coral4",alpha=0.8)+
  geom_text(aes(label=scales::percent(prop,
                                      accuracy=0.1,
                                      suffix="%") ),  
            size=3.5,vjust=-0.25,
            position = position_dodge(width = 0.9))+
  ggtitle("Percentage of samples with Sepal.Length > 5 for each species")+
  scale_y_continuous(labels = scales::percent, limits = c(0,1))+
  labs(x="Species",y="Proportion")+
  #coord_flip()+
  theme_bw()

Arranging graphs into a grid with the R package cowplot

library(cowplot)
## 
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
##   default ggplot2 theme anymore. To recover the previous
##   behavior, execute:
##   theme_set(theme_cowplot())
## ********************************************************
p1 <- iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width,
             colour = Species))+
  geom_point()+
  stat_smooth()+
  ggtitle("Width vs Length (Sepal)")+ 
  labs(x="sepal length",y="sepal width")+
  theme(legend.position="bottom",
        axis.text.x = element_text(angle = -10) )

p2 <- iris %>% 
  ggplot(aes(x=Petal.Length, y=Petal.Width,
             colour = Species))+
  geom_point()+
  stat_smooth()+
  ggtitle("Width vs Length (Petal)")+ 
  labs(x="petal length",y="petal width")+
  theme(legend.position="bottom",
        axis.text.x = element_text(angle = -10) )

plot_grid(p1, p2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Pairwise plot with GGally

library(GGally)
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
iris %>% ggpairs(aes(colour = Species, alpha = 0.4))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

iris %>% ggpairs(aes(colour = Species,alpha = 0.4), columns=1:4)

Interactive plots with plotly

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p1 <- iris %>% 
  ggplot(aes(x=Sepal.Length, y=Sepal.Width,
             colour = Species))+
  geom_point()+
  stat_smooth()+
  ggtitle("Width vs Length (Sepal)")+ 
  labs(x="sepal length",y="sepal width")+
  theme(text = element_text(size=14),
        legend.position="bottom",
        axis.text.x = element_text(angle = -10) )

ggplotly(p1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Correlation plot with corrplot

library(corrplot)
## corrplot 0.84 loaded
iris %>% 
  filter(Species=="versicolor") %>%
  select(-Species) %>%
  cor(use="complete.obs") %>%
  corrplot.mixed(main="Correlations among variables",
                 mar = c(0, 0, 2, 0))

Others

The package ggmap provides a spatial visualization with ggplot2. The package gganimate enable animation generation with ggplot2.

Errors in package references

If you loaded the package MASS after the pacakge tidyverse, there would be a error when you used the function select.

The reason is that MASS also has a function called select.

Restart the R session and run the following again:

library(tidyverse)
library(MASS)

iris %>% 
  filter(Species=="versicolor") %>%
  select(-Species)

# To fix it, we can explicitly tell R where to find the function
iris %>% 
  filter(Species=="versicolor") %>%
  dplyr::select(-Species)